Data filtering

Author

Stan Brouwer

Data filtering

This is the code used to filter and manage the output of the Xsense dot IMU data. The source code can be found on my github

Load the data

Lets start by defining a function to correctly load measurements:

Note that IMU’s do not start and stop measureing at the exact same time; even after synchronization the amount of elements per IMU (the length of measurement) differs. In my implementation of temporal relaignment I assumed that the time in SampletimeFine was synchronized, and excluded first or last elements accordingly to ensure dataframes are of equal length. This eases calculation since R prefers to calculate over lists of equal length.

Code
# Clean workspace and load dependencies
rm(list = ls())
library(tidyr)
library(dplyr)
library(plotly)
library(ggplot2)
library(knitr)
library(lubridate)
Code
LoadXsenseData <- function(nameofpp) {
  dir <- toString(nameofpp[2])
  hz <- as.numeric(nameofpp[3])
  skiprow <- as.numeric(nameofpp[4])
  #dir <- bart
  #hz <- 60

  files <- list.files(path = dir, full.names = TRUE)
  data <- list()

  # Read CSV files of each directory
  for (i in seq_along(files)) {
    data[[i]] <- read.csv(files[i], header = TRUE, skip = skiprow)
  }

  # Ensure all dataframes have the same number of rows
  min_rows <- min(sapply(data, nrow))
  data <- lapply(data, function(df) {
    df <- df[1:min_rows, , drop = FALSE]
    return(df)
  })

  # Adjust time
  for (i in seq_along(data)) {
    rows <- nrow(data[[i]])
    data[[i]]$TimeS <- ((1/hz) * (1:rows))
  }

  # Initialize toreturn data frame with time column
  toreturn <- data.frame(time = data[[1]]$TimeS)

  # Calculate absolute values
  for (i in 1:length(data)) {
    if ("FreeAcc_X" %in% names(data[[i]])) {
      col_name <- paste0("FreeAcc_abs", i)
      toreturn[[col_name]] <- sqrt(data[[i]]$FreeAcc_X^2 + data[[i]]$FreeAcc_Y^2 + data[[i]]$FreeAcc_Z^2)
    }
    if ("Acc_X" %in% names(data[[i]])) {
      col_name <- paste0("A_abs", i)
      toreturn[[col_name]] <- sqrt(data[[i]]$Acc_X^2 + data[[i]]$Acc_Y^2 + data[[i]]$Acc_Z^2)
    }
    if ("Gyr_X" %in% names(data[[i]])) {
      col_name <- paste0("Gyr_abs", i)
      toreturn[[col_name]] <- sqrt(data[[i]]$Gyr_X^2 + data[[i]]$Gyr_Y^2 + data[[i]]$Gyr_Z^2)
    }
  }

  # Order the attributes of the dataframe
  toreturn_sorted <- toreturn[, sort(names(toreturn))]

  return(toreturn_sorted)
}
Code
#! Here i defined what test subject refers to what directorty
#! should probably think about PID here later on. 
#! format: name, hz, skipheading

pp_info <- data.frame (
  #       name, file, hz, skiprow
  bart = c("bart", "../../Logs/old/20240429_163145_bart/", 60, 7),
  other = c("bart", "../../Logs/new/20240502_192335/", 60, 10)
)
Code
LoadXsenseData2 <- function(dir) {
  hz <- 60
  skiprow <- 7


  files <- list.files(path = dir, full.names = TRUE)
  data <- list()

  # Read CSV files of each directory
  for (i in seq_along(files)) {
    data[[i]] <- read.csv(files[i], header = TRUE, skip = skiprow)
  }

  # Ensure all dataframes have the same number of rows
  min_rows <- min(sapply(data, nrow))
  data <- lapply(data, function(df) {
    df <- df[1:min_rows, , drop = FALSE]
    return(df)
  })

  # Adjust time
  for (i in seq_along(data)) {
    rows <- nrow(data[[i]])
    data[[i]]$TimeS <- ((1/hz) * (1:rows))
  }

  # Initialize toreturn data frame with time column
  toreturn <- data.frame(time = data[[1]]$TimeS)

  # Calculate absolute values
  for (i in 1:length(data)) {
    if ("FreeAcc_X" %in% names(data[[i]])) {
      col_name <- paste0("FreeAcc_abs", i)
      toreturn[[col_name]] <- sqrt(data[[i]]$FreeAcc_X^2 + data[[i]]$FreeAcc_Y^2 + data[[i]]$FreeAcc_Z^2)
    }
    if ("Acc_X" %in% names(data[[i]])) {
      col_name <- paste0("A_abs", i)
      toreturn[[col_name]] <- sqrt(data[[i]]$Acc_X^2 + data[[i]]$Acc_Y^2 + data[[i]]$Acc_Z^2)
    }
    if ("Gyr_X" %in% names(data[[i]])) {
      col_name <- paste0("Gyr_abs", i)
      toreturn[[col_name]] <- sqrt(data[[i]]$Gyr_X^2 + data[[i]]$Gyr_Y^2 + data[[i]]$Gyr_Z^2)
    }
  }

  # Order the attributes of the dataframe
  toreturn_sorted <- toreturn[, sort(names(toreturn))]

  return(toreturn_sorted)
}

Visualization

lets also define some functions to visualize the data

Code
# Some functions to visualize the acceleration and the Gyr

plot_a <- function(df) {
  if ("A_abs1" %in% names(df)) {
  plot <- plot_ly(df, x = ~time, y = ~A_abs1, name = "marker1", type = "scatter", mode = "lines") %>%
          add_trace(y = ~A_abs2, name = "marker 2") %>%
          add_trace(y = ~A_abs3, name = "marker 3") %>%
          add_trace(y = ~A_abs4, name = "marker 4") %>%
          add_trace(y = ~A_abs5, name = "marker 5") %>%
          layout(title = "Absolute accelerations",
                 xaxis = list(title = "Time"),
                 yaxis = list(title = "A_abs Values")) |>
        bslib::card(full_screen = TRUE)
  }
    if ("FreeAcc_abs1" %in% names(df)) {
  plot <- plot_ly(df, x = ~time, y = ~FreeAcc_abs1, name = "marker1", type = "scatter", mode = "lines") %>%
          add_trace(y = ~FreeAcc_abs2, name = "marker 2") %>%
          add_trace(y = ~FreeAcc_abs3, name = "marker 3") %>%
          add_trace(y = ~FreeAcc_abs4, name = "marker 4") %>%
          add_trace(y = ~FreeAcc_abs5, name = "marker 5") %>%
          layout(title = "Absolute accelerations",
                 xaxis = list(title = "Time"),
                 yaxis = list(title = "FreeAcc_abs Values")) |>
        bslib::card(full_screen = TRUE)
  }
  
  return(plot)
}

plot_gyr <- function(df) {
  plot <- plot_ly(df, x = ~time, y = ~Gyr_abs1, name = "marker1", type = "scatter", mode = "lines") %>%
          add_trace(y = ~Gyr_abs2, name = "marker 2") %>%
          add_trace(y = ~Gyr_abs3, name = "marker 3") %>%
          add_trace(y = ~Gyr_abs4, name = "marker 4") %>%
          add_trace(y = ~Gyr_abs5, name = "marker 5") %>%
          layout(title = "Absolute Gyr",
                 xaxis = list(title = "Time"),
                 yaxis = list(title = "Gyr Values"))
  return(plot)
}


plot_all_columns <- function(df) {
  if (!"time" %in% names(df)) {
    stop("The dataframe must contain a 'time' column for the x-axis.")
  }
  plot <- plot_ly(df, x = ~time, type = 'scatter', mode = 'lines')
  for (col in setdiff(names(df), "time")) {
    plot <- plot %>% add_trace(y = df[[col]], name = col)
  }
  plot <- plot %>%
    layout(title = "All Columns Plot",
           xaxis = list(title = "Time"),
           yaxis = list(title = "Values"))
  return(plot)
}

#! Maybe include a plotting function that takes the dataframe and the attribute to plot, assuming 5 markers?

Initial test

Lets load some data and see how it looks:

note: to increase performance I stored the calculated values and read them. This is faster than calculating all absolute values each time the program runs

Code
# Storing the calculated data
  # meting1 <- LoadXsenseData(pp_info[1:3,1])
  # write.csv(meting1, file = "example1.csv", row.names = FALSE)

# Loading the calulated data
meting1 <- read.csv("example1.csv")

# Plot the calculated data
plot_a(meting1)
Code
plot_gyr(meting1)

This look absolutely great! that the absolute acceleration approaches gravitational constant very well! It is a tiny bit higher than the expected 9.8 due to the noise. Since the absolute is taken, all noise that is not in opposite direction of the gravity increases the measured acceleration at rest. I’m overwhelmed by the precision of the IMU’s here!

Data clipping

The protocol was such that the barbell IMU only moved while the participant was executing a jerk, or while the barbell was being loaded. Thus, the barbell IMU data seems a wise place to identify the moments at which the jerk was executed.

Letst start by calculating the range of the baseline acceleration that we measured between t = 900 and t = 1100

Code
# Find and print the minimum and maximum values from the range t = 900 - 1100
cat("minimum value of A_abs: ", min(meting1[meting1$time >= 900 & meting1$time <= 1100, ]))
minimum value of A_abs:  0.02195406
Code
cat("maximum value of A_abs: ",max(meting1[meting1$time >= 900 & meting1$time <= 1100, ]))
maximum value of A_abs:  1100

Lets increase the interval slightly so that we can use it to automatically determine where movement occurs. We could exclude all cases where the barbell IMU measures an A_abs within the 8.5 - 11.2 inteval.However, when the barbell accelerates or decelerates the A_abs crosses the interval. Lets be conservative and assume that if it crosses the the interval, it does so for less than 360 elements (6 seconds! highly conservative but it works just fine!)

Code
# New variable to work with
meting_filtered <- meting1

# Compute the run-length encoding
rle_sequence <- rle(meting_filtered$A_abs1 >= 8.5 & meting_filtered$A_abs1 <= 11.2)

# Identify the start and end indices of consecutive sequences where condition is TRUE
start_indices <- cumsum(rle_sequence$lengths) - rle_sequence$lengths + 1
end_indices <- cumsum(rle_sequence$lengths)

# Identify the consecutive sequences where the condition holds for more than 360 rows
condition_indices <- which(rle_sequence$values & rle_sequence$lengths >= 360)

# Iterate over the consecutive sequences and replace the values of A_abs2 with NA
for (i in condition_indices) {
  start_index <- start_indices[i]
  end_index <- end_indices[i]
  meting_filtered$A_abs1[start_index:end_index] <- NA
  meting_filtered$A_abs2[start_index:end_index] <- NA
  meting_filtered$A_abs3[start_index:end_index] <- NA
  meting_filtered$A_abs4[start_index:end_index] <- NA
  meting_filtered$A_abs5[start_index:end_index] <- NA
}

plot_a(meting_filtered)
Code
rm(meting_filtered, rle_sequence, start_indices, end_indices, start_index, end_index, condition_indices, i)

The barbell IMU also registers acceleration while it is loaded. Since the subject was asked to sit still when resting, and the bar was only loaded while resting, it seems safe to assume that the IMU on the lower leg remained stationary (but with a more variable baseline) when no lift was exercised.

After visual inspection the interval of 8.5 - 11.2 seemed to suffice, again under the assumption of 360 elements.

Due to my misunderstanding of the IMU’s configuration, some measurements measure the FreeAcceleration and others measure the Acceleration. Difference being whether the gravitation is accounted for. To ensure te function works in both cases, the interval is decreased by 9.81 if the dataframe contains a attribute named FreeAcc, indicating that gravity is not measured. This way the function should work in most cases.

Code
filterdata <- function(meting) {
  # All columns where the conditions as described are true are removed. However, time is unchanged, since replancements work on indices. If an entire colum where to be removed, this would cause errors when plotting the data
  
  # Calculate run-length encoding when gravity is measured
  if ("A_abs1" %in% names(meting)) {
    rle_sequence_A_abs1 <- rle(meting$A_abs1 >= 9.2 & meting$A_abs1 <= 10.3)
    rle_sequence_A_abs2 <- rle(meting$A_abs2 >= 9.0 & meting$A_abs2 <= 11.5)
    rle_sequence_A_abs3 <- rle(meting$A_abs3 >= 9.0 & meting$A_abs3 <= 11.5)
    rle_sequence_A_abs4 <- rle(meting$A_abs4 >= 8.0 & meting$A_abs4 <= 11.5)
    rle_sequence_A_abs5 <- rle(meting$A_abs5 >= 7.2 & meting$A_abs5 <= 15.7)
  }
  # Calculate run-length encoding when gravity is not measured
  if ("FreeAcc_abs1" %in% names(meting)) {
    rle_sequence_A_abs1 <- rle(meting$A_abs1 >= (9.2-9.8) & meting$A_abs1 <= (10.3-9.8))
    rle_sequence_A_abs2 <- rle(meting$A_abs2 >= (9.0-9.8) & meting$A_abs2 <= (11.5-9.8))
    rle_sequence_A_abs3 <- rle(meting$A_abs3 >= (9.0-9.8) & meting$A_abs3 <= (11.5-9.8))
    rle_sequence_A_abs4 <- rle(meting$A_abs4 >= (8.0-9.8) & meting$A_abs4 <= (11.5-9.8))
    rle_sequence_A_abs5 <- rle(meting$A_abs5 >= (7.2-9.8) & meting$A_abs5 <= (15.7-9.8))
  }
  
  # Identify the start and end indices
  #! Might write a loop for this later on (Everything up until the functions return can be looped!)
  start_indices_A_abs1 <- cumsum(rle_sequence_A_abs1$lengths) - rle_sequence_A_abs1$lengths + 1
  end_indices_A_abs1 <- cumsum(rle_sequence_A_abs1$lengths)
  start_indices_A_abs2 <- cumsum(rle_sequence_A_abs2$lengths) - rle_sequence_A_abs2$lengths + 1
  end_indices_A_abs2 <- cumsum(rle_sequence_A_abs2$lengths)
  start_indices_A_abs3 <- cumsum(rle_sequence_A_abs3$lengths) - rle_sequence_A_abs3$lengths + 1
  end_indices_A_abs3 <- cumsum(rle_sequence_A_abs3$lengths)
  start_indices_A_abs4 <- cumsum(rle_sequence_A_abs4$lengths) - rle_sequence_A_abs4$lengths + 1
  end_indices_A_abs4 <- cumsum(rle_sequence_A_abs4$lengths)
  start_indices_A_abs5 <- cumsum(rle_sequence_A_abs5$lengths) - rle_sequence_A_abs5$lengths + 1
  end_indices_A_abs5 <- cumsum(rle_sequence_A_abs5$lengths)
  
  # Identify more than 360 consecutive indices
  condition_indices_A_abs1 <- which(rle_sequence_A_abs1$values & rle_sequence_A_abs1$lengths >= 300)
  condition_indices_A_abs2 <- which(rle_sequence_A_abs2$values & rle_sequence_A_abs2$lengths >= 360)
  condition_indices_A_abs3 <- which(rle_sequence_A_abs3$values & rle_sequence_A_abs3$lengths >= 360)
  condition_indices_A_abs4 <- which(rle_sequence_A_abs4$values & rle_sequence_A_abs4$lengths >= 360)
  condition_indices_A_abs5 <- which(rle_sequence_A_abs5$values & rle_sequence_A_abs5$lengths >= 800)
  
  # Replace with NA if condition A_abs1 = true
  for (i in condition_indices_A_abs1) {
    start_index <- start_indices_A_abs1[i]
    end_index <- end_indices_A_abs1[i]
  meting[start_index:end_index, !(names(meting) %in% "time")] <- NA
  }
    # Replace with NA if condition A_abs2 = true
  for (i in condition_indices_A_abs2) {
    start_index <- start_indices_A_abs2[i]
    end_index <- end_indices_A_abs2[i]
  meting[start_index:end_index, !(names(meting) %in% "time")] <- NA
  }
  # Replace with NA if condition A_abs3 = true
  for (i in condition_indices_A_abs3) {
    start_index <- start_indices_A_abs3[i]
    end_index <- end_indices_A_abs3[i]
  meting[start_index:end_index, !(names(meting) %in% "time")] <- NA
  }
  # Replace with NA if condition A_abs4 = true
  for (i in condition_indices_A_abs4) {
    start_index <- start_indices_A_abs4[i]
    end_index <- end_indices_A_abs4[i]
  meting[start_index:end_index, !(names(meting) %in% "time")] <- NA
  }
# Replace with NA if condition A_abs5 = true
for (i in condition_indices_A_abs5) {
  start_index <- start_indices_A_abs5[i]
  end_index <- end_indices_A_abs5[i]
  meting[start_index:end_index, !(names(meting) %in% "time")] <- NA
}
  meting <- meting[-1, ]
  return(meting)
}

Lets call the function

Code
# Call the function with your dataframe as argument
filtered1 <- filterdata(meting1)
plot_a(filtered1)

The assumptions for this automatic filter might be a little conservative, but overall the filter seems to work pretty well. When compared to the raw files it might just keep a little to much. When looking for the maximum absolute values this would work perfect, but when looking at average values this method might not be precise enough. Then we would need to A. increase precision or B. decide a start and endpoint for the lift manualy.

Now lets put each lift in a separate dataframe.

Code
separatelifts <- function(filtered) {
  # Identify continuous NA portions
  na_ranges <- cumsum(is.na(filtered$A_abs1))
  # Split dataframe based on NA
  na_segments <- split(filtered, na_ranges)
  # Remove NA segments
  valid_segments <- na_segments[!sapply(na_segments, function(x) all(is.na(x$A_abs1)))]
  # Optional: Rename the dataframes for clarity
  names(valid_segments) <- paste0(seq_along(valid_segments))
  return(valid_segments)
}

Lets call the function

Code
filtered2 <- separatelifts(filtered1)
Code
plot_filtered_subplots <- function(filtered2) {
  plots <- list()
  for (i in seq_along(filtered2)) {
    plots[[i]] <- plot_ly(filtered2[[i]], x = ~time, y = ~A_abs1, name = paste0(i, " marker 1"), type = "scatter", mode = "lines") %>%
          add_trace(y = ~A_abs2, name = paste0(i, " marker 2")) %>%
          add_trace(y = ~A_abs3, name = paste0(i, " marker 3")) %>%
          add_trace(y = ~A_abs4, name = paste0(i, " marker 4")) %>%
          add_trace(y = ~A_abs5, name = paste0(i, " marker 5")) %>%
          layout(title = "Absolute accelerations",
                 height = 700,
                 xaxis = list(title = "Time"),
                 yaxis = list(title = "A_abs Values"))
  }
  return(subplot(plots, nrows = length(filtered2)))
}

Lets call the function

Code
plot_filtered_subplots(filtered2)

This is great! As expected our filter is a bit conservative. It is hard to identify the exact start of the lift mathematicly. However, when we visualise the data a clear dip in acceleration can be seen just before the peaks. This should be the start of the lift - the dip - where the subject moves downward before propelling the bar upwards. This can be seen at T=31 for the firts lift, at t=192.5 for the second lift, and t=470 for the last lift.

Obviously, the lower leg IMU moves down the least. The upper leg IMU moves down a little, and the other IMU’s move down substantially more. selecting only marker 1, 3 and 5 should make the distinction even clearer

Manual definition

Next chapter will describe how and what start and endpoints for each lift are selected.